413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.9/agpgart.patch
3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk
+4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
+4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
+4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c
+4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c
+4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h
+4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c
+4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c
+4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c
+4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c
+4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h
+4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c
+4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c
+4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c
+4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h
+4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c
+42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c
+42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h
+42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
+42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
+42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
+42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
+42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
+42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
+42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
4124b307nRyK3dhn1hAsvrY76NuV3g tools/check/Makefile
4124b307vHLUWbfpemVefmaWDcdfag tools/check/README
4124b307jt7T3CHysgl9LijNHSe1tA tools/check/check_brctl
--- /dev/null
+MAJOR = 2.0
+MINOR = 0
+SONAME = libblktap.so.$(MAJOR)
+
+CC = gcc
+
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+INCLUDES +=
+
+SRCS :=
+SRCS += blktaplib.c
+
+CFLAGS += -Wall
+CFLAGS += -Werror
+CFLAGS += -Wno-unused
+#CFLAGS += -O3
+CFLAGS += -g3
+CFLAGS += -fno-strict-aliasing
+CFLAGS += -I $(XEN_LIBXC)
+CFLAGS += -I $(XEN_LIBXUTIL)
+CFLAGS += $(INCLUDES) -I.
+CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+
+LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
+
+all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio
+ $(MAKE) $(LIB)
+
+LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
+mk-symlinks:
+ [ -e xen/linux ] || mkdir -p xen/linux
+ [ -e xen/io ] || mkdir -p xen/io
+ ( cd xen >/dev/null ; \
+ ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
+ ( cd xen/io >/dev/null ; \
+ ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
+ ( cd xen/linux >/dev/null ; \
+ ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . )
+
+install: all
+ mkdir -p $(prefix)/usr/lib
+ mkdir -p $(prefix)/usr/include
+ install -m0755 $(LIB) $(prefix)/usr/lib
+ ln -sf libblktap.so.$(MAJOR).$(MINOR) \
+ $(prefix)/usr/lib/libblktap.so.$(MAJOR)
+ ln -sf libblktap.so.$(MAJOR) $(prefix)/usr/lib/libblktap.so
+ install -m0644 blktaplib.h $(prefix)/usr/include
+
+clean:
+ rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio
+
+rpm: all
+ rm -rf staging
+ mkdir staging
+ mkdir staging/i386
+ rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
+ --define "_rpmdir$$PWD/staging" -bb rpm.spec
+ mv staging/i386/*.rpm .
+ rm -rf staging
+
+libblktap.so:
+ ln -sf libblktap.so.$(MAJOR) $@
+libblktap.so.$(MAJOR):
+ ln -sf libblktap.so.$(MAJOR).$(MINOR) $@
+libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
+ $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -L../libxutil -lxutil -lz
+
+blkdump: $(LIB)
+ $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
+
+blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c
+ $(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c
+
+blkcow: $(LIB) blkcow.c blkcowlib.c
+ $(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c
+
+blkimg: $(LIB) blkimg.c blkimglib.c
+ $(CC) $(CFLAGS) -o blkimg -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c
+
+blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c
+ $(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a
+
+blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c
+ $(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a
+
+blkaio: $(LIB) blkaio.c blkaiolib.c
+ $(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread
+
+.PHONY: TAGS clean install mk-symlinks rpm
+TAGS:
+ etags -t $(SRCS) *.h
+
+-include $(DEPS)
--- /dev/null
+Block Tap User-level Interfaces
+Andrew Warfield
+andrew.warfield@cl.cam.ac.uk
+February 8, 2005
+
+NOTE #1: The blktap is _experimental_ code. It works for me. Your
+mileage may vary. Don't use it for anything important. Please. ;)
+
+NOTE #2: All of the interfaces here are likely to change. This is all
+early code, and I am checking it in because others want to play with
+it. If you use it for anything, please let me know!
+
+Overview:
+---------
+
+This directory contains a library and set of example applications for
+the block tap device. The block tap hooks into the split block device
+interfaces above Xen allowing them to be extended. This extension can
+be done in userspace with the help of a library.
+
+The tap can be installed either as an interposition domain in between
+a frontend and backend driver pair, or as a terminating backend, in
+which case it is responsible for serving all requests itself.
+
+There are two reasons that you might want to use the tap,
+corresponding to these configurations:
+
+ 1. To examine or modify a stream of block requests while they are
+ in-flight (e.g. to encrypt data, or add data-driven watchpoints)
+
+ 2. To prototype a new backend driver, serving requests from the tap
+ rather than passing them along to the XenLinux blkback driver.
+ (e.g. to forward block requests to a remote host)
+
+
+Interface:
+----------
+
+At the moment, the tap interface is similar in spirit to that of the
+Linux netfilter. Requests are messages from a client (frontend)
+domain to a disk (backend) domain. Responses are messages travelling
+back, acknowledging the completion of a request. the library allows
+chains of functions to be attached to these events. In addition,
+hooks may be attached to handle control messages, which signify things
+like connections from new domains.
+
+At present the control messages especially expose a lot of the
+underlying driver interfaces. This may change in the future in order
+to simplify writing hooks.
+
+Here are the public interfaces:
+
+These allow hook functions to be chained:
+
+ void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
+ void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
+ void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
+
+This allows a response to be injected, in the case where a request has
+been removed using BLKTAP_STOLEN.
+
+ void blktap_inject_response(blkif_response_t *);
+
+These let you add file descriptors and handlers to the main poll loop:
+
+ int blktap_attach_poll(int fd, short events, int (*func)(int));
+ void blktap_detach_poll(int fd);
+
+This starts the main poll loop:
+
+ int blktap_listen(void);
+
+Example:
+--------
+
+blkimage.c uses an image on the local file system to serve requests to
+a domain. Here's what it looks like:
+
+---[blkimg.c]---
+
+/* blkimg.c
+ *
+ * file-backed disk.
+ */
+
+#include "blktaplib.h"
+#include "blkimglib.h"
+
+
+int main(int argc, char *argv[])
+{
+ image_init();
+
+ blktap_register_ctrl_hook("image_control", image_control);
+ blktap_register_request_hook("image_request", image_request);
+ blktap_listen();
+
+ return 0;
+}
+
+----------------
+
+All of the real work is in blkimglib.c, but this illustrates the
+actual tap interface well enough. image_control() will be called with
+all control messages. image_request() handles requests. As it reads
+from an on-disk image file, no requests are ever passed on to a
+backend, and so there will be no responses to process -- so there is
+nothing registered as a response hook.
+
+Other examples:
+---------------
+
+Here is a list of other examples in the directory:
+
+Things that terminate a block request stream:
+
+ blkimg - Use a image file/device to serve requests
+ blkgnbd - Use a remote gnbd server to serve requests
+ blkaio - Use libaio... (DOES NOT WORK)
+
+Things that don't:
+
+ blkdump - Print in-flight requests.
+ blkcow - Really inefficient copy-on-write disks using libdb to store
+ writes.
+
+There are examples of plugging these things together, for instance
+blkcowgnbd is a read-only gnbd device with copy-on-write to a local
+file.
+
+TODO:
+-----
+
+- Make session tracking work. At the moment these generally just handle a
+ single front-end client at a time.
+
+- Integrate with Xend. Need to cleanly pass a image identifier in the connect
+ message.
+
+- Make an asynchronous file-io terminator. The libaio attempt is
+ tragically stalled because mapped foreign pages make pfn_valid fail
+ (they are VM_IO), and so cannot be passed to aio as targets. A
+ better solution may be to tear the disk interfaces out of the real
+ backend and expose them somehow.
+
+- Make CoW suck less.
+
+- Do something more along the lines of dynamic linking for the
+ plugins, so thatthey don't all need a new main().
--- /dev/null
+/* blkaio.c
+ *
+ * libaio-backed disk.
+ */
+
+#include "blktaplib.h"
+#include "blkaiolib.h"
+
+
+int main(int argc, char *argv[])
+{
+ aio_init();
+
+ blktap_register_ctrl_hook("aio_control", aio_control);
+ blktap_register_request_hook("aio_request", aio_request);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkaiolib.c
+ *
+ * file/device image-backed block device -- using linux libaio.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent.
+ *
+ * NOTE: This doesn't work. Grrr.
+ */
+
+#define _GNU_SOURCE
+#define __USE_LARGEFILE64
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <db.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#include <errno.h>
+#include <libaio.h>
+#include <pthread.h>
+#include <time.h>
+#include "blktaplib.h"
+
+//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
+#define TMP_IMAGE_FILE_NAME "fc3.image"
+
+#define MAX_DOMS 1024
+#define MAX_IMGNAME_LEN 255
+#define AMORFS_DEV 61440
+#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
+#define MAX_SEGMENTS_PER_REQ 11
+#define SECTOR_SHIFT 9
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
+
+#if 1
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+char dbg_page[4096];
+
+typedef struct {
+ /* These need to turn into an array/rbtree for multi-disk support. */
+ int fd;
+ u64 fsid;
+ char imgname[MAX_IMGNAME_LEN];
+ blkif_vdev_t vdevice;
+} image_t;
+
+/* Note on pending_reqs: I assume all reqs are queued before they start to
+ * get filled. so count of 0 is an unused record.
+ */
+typedef struct {
+ blkif_request_t req;
+ int count;
+} pending_req_t;
+
+static pending_req_t pending_list[MAX_REQUESTS];
+image_t *images[MAX_DOMS];
+
+static io_context_t ctx;
+static struct iocb *iocb_free[MAX_AIO_REQS];
+static int iocb_free_count;
+
+/* ---[ Notification mecahnism ]--------------------------------------- */
+
+enum {
+ READ = 0,
+ WRITE = 1
+};
+
+static int aio_notify[2];
+static volatile int aio_listening = 0;
+
+static struct io_event aio_events[MAX_AIO_REQS];
+static int aio_event_count = 0;
+
+/* this is commented out in libaio.h for some reason. */
+extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
+
+static void *notifier_thread(void *arg)
+{
+ int ret;
+ int msg = 0x00feeb00;
+
+ printf("Notifier thread started.\n");
+ for (;;) {
+ //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) {
+ if ((aio_listening) &&
+ ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) {
+ aio_event_count = ret;
+ printf("[Notifying! (%d)]\n", aio_event_count);
+ aio_listening = 0;
+ write(aio_notify[WRITE], &msg, sizeof(msg));
+ fsync(aio_notify[WRITE]);
+ } else {
+ if (aio_listening)
+ printf("[io_queue_wait error! %d]\n", errno);
+ usleep(1000); /* Not ready to read. */
+ }
+ }
+}
+
+/* -------------------------------------------------------------------- */
+
+int aio_control(control_msg_t *msg)
+{
+ domid_t domid;
+ DB *db;
+ int ret;
+
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
+ ((blkif_be_create_t *)msg->msg)->domid,
+ ((blkif_be_create_t *)msg->msg)->blkif_handle);
+ domid = ((blkif_be_create_t *)msg->msg)->domid;
+ if (images[domid] != NULL) {
+ printf("attempt to connect from an existing dom!\n");
+ return 0;
+ }
+
+ images[domid] = (image_t *)malloc(sizeof(image_t));
+ if (images[domid] == NULL) {
+ printf("error allocating image record.\n");
+ return 0;
+ }
+
+ images[domid]->fd = -1;
+ images[domid]->fsid = 0;
+
+ printf("Image connected.\n");
+ break;
+
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
+ ((blkif_be_destroy_t *)msg->msg)->domid,
+ ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
+
+ domid = ((blkif_be_destroy_t *)msg->msg)->domid;
+ if (images[domid] != NULL) {
+ if (images[domid]->fd != -1)
+ close( images[domid]->fd );
+ free( images[domid] );
+ images[domid] = NULL;
+ }
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ {
+ blkif_be_vbd_grow_t *grow;
+
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->domid,
+ ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
+ printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
+ grow = (blkif_be_vbd_grow_t *)msg->msg;
+ domid = grow->domid;
+ if (images[domid] == NULL) {
+ printf("VBD_GROW on unconnected domain!\n");
+ return 0;
+ }
+
+ if (grow->extent.device != AMORFS_DEV) {
+ printf("VBD_GROW on non-amorfs device!\n");
+ return 0;
+ }
+
+ /* TODO: config support for arbitrary image files/modes. */
+ sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
+
+ images[domid]->fsid = grow->extent.sector_start;
+ images[domid]->vdevice = grow->vdevice;
+ images[domid]->fd = open(TMP_IMAGE_FILE_NAME,
+ O_RDWR | O_DIRECT | O_LARGEFILE);
+ if (images[domid]->fd < 0) {
+ printf("Couldn't open image file! %d\n", errno);
+ return 0;
+ }
+
+ printf("Image file opened. (%s)\n", images[domid]->imgname);
+ break;
+ }
+ }
+ return 0;
+parse_error:
+ printf("Bad control message!\n");
+ return 0;
+
+create_failed:
+ /* TODO: close the db ref. */
+ return 0;
+}
+
+int aio_request(blkif_request_t *req)
+{
+ int fd;
+ u64 sector;
+ char *spage, *dpage;
+ int ret, i, idx;
+ blkif_response_t *rsp;
+ domid_t dom = ID_TO_DOM(req->id);
+
+ if ((images[dom] == NULL) || (images[dom]->fd == -1)) {
+ printf("Data request for unknown domain!!! %d\n", dom);
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+ }
+
+ fd = images[dom]->fd;
+
+ switch (req->operation)
+ {
+ case BLKIF_OP_PROBE:
+ {
+ struct stat stat;
+ vdisk_t *img_info;
+
+
+ /* We expect one buffer only. */
+ if ( req->nr_segments != 1 )
+ goto err;
+
+ /* Make sure the buffer is page-sized. */
+ if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+ (blkif_last_sect (req->frame_and_sects[0]) != 7) )
+ goto err;
+
+ /* loop for multiple images would start here. */
+
+ ret = fstat(fd, &stat);
+ if (ret != 0) {
+ printf("Couldn't stat image in PROBE!\n");
+ goto err;
+ }
+
+ img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
+ img_info[0].device = images[dom]->vdevice;
+ img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+ img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
+
+ if (img_info[0].capacity == 0)
+ img_info[0].capacity = ((u64)1 << 63); // xend does this too.
+
+ DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
+ img_info[0].capacity);
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = 1; /* number of disks */
+
+ return BLKTAP_RESPOND;
+ }
+ case BLKIF_OP_WRITE:
+ {
+ unsigned long size;
+ struct iocb *io;
+ struct iocb *ioq[MAX_SEGMENTS_PER_REQ];
+
+ idx = ID_TO_IDX(req->id);
+ ASSERT(pending_list[idx].count == 0);
+ memcpy(&pending_list[idx].req, req, sizeof(*req));
+ pending_list[idx].count = req->nr_segments;
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ sector = req->sector_number + (8*i);
+
+ size = blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1;
+
+ DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << SECTOR_SHIFT));
+
+ spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+
+ /*convert size and sector to byte offsets */
+ size <<= SECTOR_SHIFT;
+ sector <<= SECTOR_SHIFT;
+
+ io = iocb_free[--iocb_free_count];
+ io_prep_pwrite(io, fd, spage, size, sector);
+ io->data = (void *)idx;
+ ioq[i] = io;
+ }
+
+ ret = io_submit(ctx, req->nr_segments, ioq);
+ if (ret < 0)
+ printf("BADNESS: io_submit error! (%d)\n", errno);
+
+ pending_list[idx].count = req->nr_segments;
+
+ return BLKTAP_STOLEN;
+
+ }
+ case BLKIF_OP_READ:
+ {
+ unsigned long size;
+ struct iocb *io;
+ struct iocb *ioq[MAX_SEGMENTS_PER_REQ];
+
+ idx = ID_TO_IDX(req->id);
+ ASSERT(pending_list[idx].count == 0);
+ memcpy(&pending_list[idx].req, req, sizeof(*req));
+ pending_list[idx].count = req->nr_segments;
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ sector = req->sector_number + (8*i);
+
+ size = blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1;
+
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+
+
+ DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
+ "pos: %15lu dpage: %p\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << SECTOR_SHIFT), dpage);
+
+ /*convert size and sector to byte offsets */
+ size <<= SECTOR_SHIFT;
+ sector <<= SECTOR_SHIFT;
+
+ io = iocb_free[--iocb_free_count];
+
+ io_prep_pread(io, fd, dpage, size, sector);
+ io->data = (void *)idx;
+
+ ioq[i] = io;
+ }
+
+ ret = io_submit(ctx, req->nr_segments, ioq);
+ if (ret < 0)
+ printf("BADNESS: io_submit error! (%d)\n", errno);
+
+
+ return BLKTAP_STOLEN;
+
+ }
+ }
+
+ printf("Unknown block operation!\n");
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+
+int aio_pollhook(int fd)
+{
+ struct io_event *ep;
+ int n, ret, idx;
+ blkif_request_t *req;
+ blkif_response_t *rsp;
+
+ DPRINTF("aio_hook(): \n");
+
+ for (ep = aio_events; aio_event_count-- > 0; ep++) {
+ struct iocb *io = ep->obj;
+ idx = (int) ep->data;
+
+ if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
+ printf("gnbd returned a bad cookie (%u)!\n", idx);
+ break;
+ }
+
+ if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n",
+ (int)ep->res, (int)ep->res2);
+
+ pending_list[idx].count--;
+ iocb_free[iocb_free_count++] = io;
+
+ if (pending_list[idx].count == 0) {
+ blkif_request_t tmp = pending_list[idx].req;
+ rsp = (blkif_response_t *)&pending_list[idx].req;
+ rsp->id = tmp.id;
+ rsp->operation = tmp.operation;
+ rsp->status = BLKIF_RSP_OKAY;
+ blktap_inject_response(rsp);
+ }
+ }
+
+ printf("pollhook done!\n");
+
+ read(aio_notify[READ], &idx, sizeof(idx));
+ aio_listening = 1;
+
+ return 0;
+}
+
+/* the image library terminates the request stream. _resp is a noop. */
+int aio_response(blkif_response_t *rsp)
+{
+ return BLKTAP_PASS;
+}
+
+void aio_init(void)
+{
+ int i, rc;
+ pthread_t p;
+
+ for (i = 0; i < MAX_DOMS; i++)
+ images[i] = NULL;
+
+ for (i = 0; i < MAX_REQUESTS; i++)
+ pending_list[i].count = 0;
+
+ memset(&ctx, 0, sizeof(ctx));
+ rc = io_queue_init(MAX_AIO_REQS, &ctx);
+ if (rc != 0) {
+ printf("queue_init failed! (%d)\n", rc);
+ exit(0);
+ }
+
+ for (i=0; i<MAX_AIO_REQS; i++) {
+ if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
+ printf("error allocating iocb array\n");
+ exit(0);
+ }
+ iocb_free_count = i;
+ }
+
+ rc = pipe(aio_notify);
+ if (rc != 0) {
+ printf("pipe failed! (%d)\n", errno);
+ exit(0);
+ }
+
+ rc = pthread_create(&p, NULL, notifier_thread, NULL);
+ if (rc != 0) {
+ printf("pthread_create failed! (%d)\n", errno);
+ exit(0);
+ }
+
+ aio_listening = 1;
+
+ blktap_attach_poll(aio_notify[READ], POLLIN, aio_pollhook);
+}
+
--- /dev/null
+/* blkaiolib.h
+ *
+ * aio image-backed block device.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent.
+ */
+
+int aio_control(control_msg_t *msg);
+int aio_request(blkif_request_t *req);
+int aio_response(blkif_response_t *rsp); /* noop */
+void aio_init(void);
--- /dev/null
+/* blkcow.c
+ *
+ * copy on write a block device. in a really inefficient way.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * This uses whatever backend the tap is attached to as the read-only
+ * underlay -- for the moment.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent,
+ * the cow plugin uses this to identify a unique overlay.
+ */
+
+#include "blktaplib.h"
+#include "blkcowlib.h"
+
+
+int main(int argc, char *argv[])
+{
+ cow_init();
+
+ blktap_register_ctrl_hook("cow_control", cow_control);
+ blktap_register_request_hook("cow_request", cow_request);
+ blktap_register_response_hook("cow_response", cow_response);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkcowgnbd.c
+ *
+ * gnbd-backed cow.
+ */
+
+#include "blktaplib.h"
+#include "blkcowlib.h"
+#include "blkgnbdlib.h"
+
+
+int main(int argc, char *argv[])
+{
+ cow_init();
+ gnbd_init();
+
+ blktap_register_ctrl_hook("cow_control", cow_control);
+ blktap_register_ctrl_hook("gnbd_control", gnbd_control);
+ blktap_register_request_hook("cow_request", cow_request);
+ blktap_register_request_hook("gnbd_request", gnbd_request);
+ blktap_register_response_hook("cow_response", cow_response);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkcowimg.c
+ *
+ * file-backed cow.
+ */
+
+#include "blktaplib.h"
+#include "blkcowlib.h"
+#include "blkimglib.h"
+
+
+int main(int argc, char *argv[])
+{
+ cow_init();
+ image_init();
+
+ blktap_register_ctrl_hook("cow_control", cow_control);
+ blktap_register_ctrl_hook("image_control", image_control);
+ blktap_register_request_hook("cow_request", cow_request);
+ blktap_register_request_hook("image_request", image_request);
+ blktap_register_response_hook("cow_response", cow_response);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkcowlib.c
+ *
+ * copy on write a block device. in a really inefficient way.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * This uses whatever backend the tap is attached to as the read-only
+ * underlay -- for the moment.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent,
+ * the cow plugin uses this to identify a unique overlay.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <db.h>
+#include "blktaplib.h"
+
+#define MAX_DOMS 1024
+#define MAX_DBNAME_LEN 255
+#define AMORFS_DEV 61440
+#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* Berkeley db has different params for open() after 4.1 */
+#ifndef DB_VERSION_MAJOR
+# define DB_VERSION_MAJOR 1
+#endif /* DB_VERSION_MAJOR */
+#ifndef DB_VERSION_MINOR
+# define DB_VERSION_MINOR 0
+#endif /* DB_VERSION_MINOR */
+
+typedef struct {
+ DB *db;
+ u64 fsid;
+ char dbname[MAX_DBNAME_LEN];
+} cow_t;
+
+cow_t *cows[MAX_DOMS];
+blkif_request_t *reread_list[MAX_REQUESTS];
+
+int cow_control(control_msg_t *msg)
+{
+ domid_t domid;
+ DB *db;
+ int ret;
+
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
+ ((blkif_be_create_t *)msg->msg)->domid,
+ ((blkif_be_create_t *)msg->msg)->blkif_handle);
+ domid = ((blkif_be_create_t *)msg->msg)->domid;
+ if (cows[domid] != NULL) {
+ printf("attempt to connect from an existing dom!\n");
+ return 0;
+ }
+
+ cows[domid] = (cow_t *)malloc(sizeof(cow_t));
+ if (cows[domid] == NULL) {
+ printf("error allocating cow.\n");
+ return 0;
+ }
+
+ cows[domid]->db = NULL;
+ cows[domid]->fsid = 0;
+
+ printf("COW connected.\n");
+ break;
+
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
+ ((blkif_be_destroy_t *)msg->msg)->domid,
+ ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
+
+ domid = ((blkif_be_destroy_t *)msg->msg)->domid;
+ if (cows[domid] != NULL) {
+ if (cows[domid]->db != NULL)
+ cows[domid]->db->close(cows[domid]->db, 0);
+ free(cows[domid]);
+ cows[domid] = NULL;
+ }
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ {
+ blkif_be_vbd_grow_t *grow;
+
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->domid,
+ ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
+ printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
+ grow = (blkif_be_vbd_grow_t *)msg->msg;
+ domid = grow->domid;
+ if (cows[domid] == NULL) {
+ printf("VBD_GROW on unconnected domain!\n");
+ return 0;
+ }
+
+ if (grow->extent.device != AMORFS_DEV) {
+ printf("VBD_GROW on non-amorfs device!\n");
+ return 0;
+ }
+
+ sprintf(&cows[domid]->dbname[0], "%020llu.db",
+ grow->extent.sector_start);
+
+ cows[domid]->fsid = grow->extent.sector_start;
+
+ if ((ret = db_create(&db, NULL, 0)) != 0) {
+ fprintf(stderr, "db_create: %s\n", db_strerror(ret));
+ return 0;
+ }
+
+
+#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1)
+
+ if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE,
+ DB_CREATE, 0664)) != 0) {
+
+#else /* DB_VERSION >= 4.1 */
+
+ if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE,
+ DB_CREATE, 0664)) != 0) {
+
+#endif /* DB_VERSION < 4.1 */
+
+ db->err(db, ret, "%s", cows[domid]->dbname);
+ goto create_failed;
+ }
+ cows[domid]->db = db;
+ printf("Overlay db opened. (%s)\n", cows[domid]->dbname);
+ break;
+ }
+ }
+ return 0;
+parse_error:
+ printf("Bad control message!\n");
+ return 0;
+
+create_failed:
+ /* TODO: close the db ref. */
+ return 0;
+}
+
+int cow_request(blkif_request_t *req)
+{
+ DB *db;
+ DBT key, data;
+ u64 sector;
+ char *spage, *dpage;
+ int ret, i, idx;
+ blkif_response_t *rsp;
+ domid_t dom = ID_TO_DOM(req->id);
+
+ if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
+ printf("Data request for unknown domain!!! %d\n", dom);
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+ }
+
+ db = cows[dom]->db;
+
+ switch (req->operation)
+ {
+ case BLKIF_OP_PROBE:
+/* debug -- delete */
+idx = ID_TO_IDX(req->id);
+reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
+memcpy(reread_list[idx], req, sizeof(*req));
+ return BLKTAP_PASS;
+
+ case BLKIF_OP_WRITE:
+ for (i = 0; i < req->nr_segments; i++) {
+ memset(&key, 0, sizeof(key));
+ memset(&data, 0, sizeof(data));
+
+ sector = req->sector_number + (8*i);
+ key.data = §or;
+ key.size = sizeof(sector);
+
+ spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ data.data = spage;
+ data.size = PAGE_SIZE;
+
+
+ DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << 9));
+
+ if ((ret = db->put(db, NULL, &key, &data, 0)) == 0)
+ DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data));
+ else {
+ db->err(db, ret, "DB->put");
+ goto err;
+ }
+ }
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ rsp->status = BLKIF_RSP_OKAY;
+
+ return BLKTAP_RESPOND;
+
+ case BLKIF_OP_READ:
+ for (i = 0; i < req->nr_segments; i++) {
+ memset(&key, 0, sizeof(key));
+ memset(&data, 0, sizeof(data));
+
+ sector = req->sector_number + (8*i);
+ key.data = §or;
+ key.size = sizeof(sector);
+
+ DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << 9));
+
+ if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
+ DPRINTF("db: %llu: key retrieved (req).\n",
+ *((u64 *)key.data));
+
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ spage = data.data;
+ memcpy(dpage, spage, PAGE_SIZE);
+
+ } else if (ret == DB_NOTFOUND) {
+ idx = ID_TO_IDX(req->id);
+ if (idx > MAX_REQUESTS) {
+ printf("Bad index!\n");
+ goto err;
+ }
+ if (reread_list[idx] != NULL) {
+ printf("Dupe index!\n");
+ goto err;
+ }
+ reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
+ memcpy(reread_list[idx], req, sizeof(*req));
+ return BLKTAP_PASS;
+ } else {
+ db->err(db, ret, "DB->get");
+ goto err;
+ }
+ }
+
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_READ;
+ rsp->status = BLKIF_RSP_OKAY;
+ return BLKTAP_RESPOND;
+ }
+
+ printf("Unknow block operation!\n");
+ return BLKTAP_PASS;
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+int cow_response(blkif_response_t *rsp)
+{
+ blkif_request_t *req;
+ int i, ret;
+ DB *db;
+ DBT key, data;
+ u64 sector;
+ char *spage, *dpage;
+ int idx = ID_TO_IDX(rsp->id);
+ domid_t dom;
+
+ /* don't touch erroring responses. */
+ if (rsp->status == BLKIF_RSP_ERROR)
+ return BLKTAP_PASS;
+
+ if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL))
+ {
+ req = reread_list[idx];
+ dom = ID_TO_DOM(req->id);
+
+ if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
+ printf("Response from unknown domain!!! Very badness! %d\n", dom);
+ return BLKTAP_PASS;
+ }
+
+ db = cows[dom]->db;
+
+ for (i = 0; i < req->nr_segments; i++) {
+ memset(&key, 0, sizeof(key));
+ memset(&data, 0, sizeof(data));
+
+ sector = req->sector_number + (8*i);
+ key.data = §or;
+ key.size = sizeof(sector);
+
+ if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
+ printf("db: %llu: key retrieved (rsp).\n",
+ *((u64 *)key.data));
+
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ spage = data.data;
+ memcpy(dpage, spage, PAGE_SIZE);
+
+ } else if (ret == DB_NOTFOUND) {
+ continue; /* We read this from disk. */
+ } else {
+ db->err(db, ret, "DB->get");
+ goto err;
+ }
+ }
+ free(reread_list[idx]);
+ reread_list[idx] = NULL;
+ }
+
+ if (rsp->operation == BLKIF_OP_PROBE) {
+
+ vdisk_t *img_info;
+
+ req = reread_list[idx];
+ img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
+ for (i =0; i < rsp->status; i++)
+ printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n",
+ i,
+ img_info[0].device,
+ img_info[0].capacity,
+ img_info[0].info);
+ free(reread_list[idx]);
+ reread_list[idx] = NULL;
+ }
+
+err:
+ return BLKTAP_PASS;
+}
+
+void cow_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_DOMS; i++)
+ cows[i] = NULL;
+
+ for (i = 0; i < MAX_REQUESTS; i++)
+ reread_list[MAX_REQUESTS] = NULL;
+}
+
--- /dev/null
+/* blkcowlib.h
+ *
+ * copy on write a block device. in a really inefficient way.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * public interfaces to the CoW tap.
+ *
+ */
+
+int cow_control (control_msg_t *msg);
+int cow_request (blkif_request_t *req);
+int cow_response (blkif_response_t *rsp);
+void cow_init (void);
--- /dev/null
+/* blkdump.c
+ *
+ * show a running trace of block requests as they fly by.
+ *
+ * (c) 2004 Andrew Warfield.
+ */
+
+#include <stdio.h>
+#include "blktaplib.h"
+
+int control_print(control_msg_t *msg)
+{
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
+ ((blkif_be_create_t *)msg->msg)->domid,
+ ((blkif_be_create_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
+ ((blkif_be_destroy_t *)msg->msg)->domid,
+ ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n",
+ ((blkif_be_connect_t *)msg->msg)->domid,
+ ((blkif_be_connect_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n",
+ ((blkif_be_disconnect_t *)msg->msg)->domid,
+ ((blkif_be_disconnect_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_create_t *)msg->msg)->domid,
+ ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_create_t *)msg->msg)->vdevice);
+ break;
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_destroy_t *)msg->msg)->domid,
+ ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->domid,
+ ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
+ printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
+ break;
+ case CMSG_BLKIF_BE_VBD_SHRINK:
+ if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_SHRINK(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_shrink_t *)msg->msg)->domid,
+ ((blkif_be_vbd_shrink_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_shrink_t *)msg->msg)->vdevice);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ return 0;
+
+parse_error:
+ printf("[CONTROL_MSG] Bad message type or length!\n");
+ return 0;
+}
+
+int request_print(blkif_request_t *req)
+{
+ int i;
+ unsigned long fas;
+
+ if ( req->operation == BLKIF_OP_PROBE ) {
+ printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id),
+ blkif_op_name[req->operation]);
+ return BLKTAP_PASS;
+ } else {
+ printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n",
+ ID_TO_DOM(req->id), ID_TO_IDX(req->id),
+ blkif_op_name[req->operation],
+ req->nr_segments, req->device,
+ req->sector_number);
+
+
+ for (i=0; i < req->nr_segments; i++) {
+ fas = req->frame_and_sects[i];
+ printf(" (pf: 0x%8lx start: %lu stop: %lu)\n",
+ (fas & PAGE_MASK),
+ blkif_first_sect(fas),
+ blkif_last_sect(fas)
+ );
+ }
+
+ }
+
+ return BLKTAP_PASS;
+}
+
+int response_print(blkif_response_t *rsp)
+{
+ if ( rsp->operation == BLKIF_OP_PROBE ) {
+ printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
+ blkif_op_name[rsp->operation]);
+ return BLKTAP_PASS;
+ } else {
+ printf("[%2u:%2u>%5s] (status: %d)\n",
+ ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
+ blkif_op_name[rsp->operation],
+ rsp->status);
+
+ }
+ return BLKTAP_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ blktap_register_ctrl_hook("control_print", control_print);
+ blktap_register_request_hook("request_print", request_print);
+ blktap_register_response_hook("response_print", response_print);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkgnbd.c
+ *
+ * gnbd-backed disk.
+ */
+
+#include "blktaplib.h"
+#include "blkgnbdlib.h"
+
+
+int main(int argc, char *argv[])
+{
+ gnbd_init();
+
+ blktap_register_ctrl_hook("gnbd_control", gnbd_control);
+ blktap_register_request_hook("gnbd_request", gnbd_request);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkgnbdlib.c
+ *
+ * gnbd image-backed block device.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <db.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/poll.h>
+#include "blktaplib.h"
+#include "libgnbd/libgnbd.h"
+
+#define GNBD_SERVER "skirmish.cl.cam.ac.uk"
+#define GNBD_CLIENT "pengi-0.xeno.cl.cam.ac.uk"
+#define GNBD_MOUNT "fc2_akw27"
+#define GNBD_PORT 0x38e7
+
+#define MAX_DOMS 1024
+#define MAX_IMGNAME_LEN 255
+#define AMORFS_DEV 61440
+#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
+#define SECTOR_SHIFT 9
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define GH_DISCONNECTED 0
+#define GH_PROBEWAITING 1
+#define GH_CONNECTED 2
+
+typedef struct {
+ /* These need to turn into an array/rbtree for multi-disk support. */
+ struct gnbd_handle *gh;
+ int gh_state;
+ int probe_idx; /* This really needs cleaning up after hotos. */
+ int fd;
+ u64 fsid;
+ char gnbdname[MAX_IMGNAME_LEN];
+ blkif_vdev_t vdevice;
+} gnbd_t;
+
+/* Note on pending_reqs: I assume all reqs are queued before they start to
+ * get filled. so count of 0 is an unused record.
+ */
+typedef struct {
+ blkif_request_t req;
+ int count;
+} pending_req_t;
+
+static gnbd_t *gnbds[MAX_DOMS];
+static pending_req_t pending_list[MAX_REQUESTS];
+static int pending_count = 0; /* debugging */
+
+
+gnbd_t *get_gnbd_by_fd(int fd)
+{
+ /* this is a linear scan for the moment. nees to be cleaned up for
+ multi-disk support. */
+
+ int i;
+
+ for (i=0; i< MAX_DOMS; i++)
+ if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd))
+ return gnbds[i];
+
+ return NULL;
+}
+
+int gnbd_pollhook(int fd);
+
+int gnbd_control(control_msg_t *msg)
+{
+ domid_t domid;
+ DB *db;
+ int ret;
+
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
+ ((blkif_be_create_t *)msg->msg)->domid,
+ ((blkif_be_create_t *)msg->msg)->blkif_handle);
+ domid = ((blkif_be_create_t *)msg->msg)->domid;
+ if (gnbds[domid] != NULL) {
+ printf("attempt to connect from an existing dom!\n");
+ return 0;
+ }
+
+ gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t));
+ if (gnbds[domid] == NULL) {
+ printf("error allocating gnbd record.\n");
+ return 0;
+ }
+
+ gnbds[domid]->gh = NULL;
+ gnbds[domid]->fsid = 0;
+
+ break;
+
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
+ ((blkif_be_destroy_t *)msg->msg)->domid,
+ ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
+
+ domid = ((blkif_be_destroy_t *)msg->msg)->domid;
+ if (gnbds[domid] != NULL) {
+ if (gnbds[domid]->gh != NULL) {
+ blktap_detach_poll(gnbds[domid]->fd);
+ free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */;
+ }
+ free( gnbds[domid] );
+ gnbds[domid] = NULL;
+ }
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ {
+ blkif_be_vbd_grow_t *grow;
+
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->domid,
+ ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
+ printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
+ grow = (blkif_be_vbd_grow_t *)msg->msg;
+ domid = grow->domid;
+ if (gnbds[domid] == NULL) {
+ printf("VBD_GROW on unconnected domain!\n");
+ return 0;
+ }
+
+ if (grow->extent.device != AMORFS_DEV) {
+ printf("VBD_GROW on non-amorfs device!\n");
+ return 0;
+ }
+
+ /* TODO: config support for arbitrary gnbd files/modes. */
+ sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT);
+
+ gnbds[domid]->fsid = grow->extent.sector_start;
+ gnbds[domid]->vdevice = grow->vdevice;
+ gnbds[domid]->gh_state = GH_DISCONNECTED;
+ gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT,
+ gnbds[domid]->gnbdname, GNBD_CLIENT);
+ if (gnbds[domid]->gh == NULL) {
+ printf("Couldn't connect to gnbd mount!!\n");
+ return 0;
+ }
+ gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh);
+ blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook);
+
+ printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname);
+ break;
+ }
+ }
+ return 0;
+parse_error:
+ printf("Bad control message!\n");
+ return 0;
+
+create_failed:
+ /* TODO: close the db ref. */
+ return 0;
+}
+
+static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd)
+{
+ int fd;
+ struct stat stat;
+ vdisk_t *gnbd_info;
+ blkif_response_t *rsp;
+
+ /* We expect one buffer only. */
+ if ( req->nr_segments != 1 )
+ goto err;
+
+ /* Make sure the buffer is page-sized. */
+ if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+ (blkif_last_sect (req->frame_and_sects[0]) != 7) )
+ goto err;
+
+ /* loop for multiple gnbds would start here. */
+
+ gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
+ gnbd_info[0].device = gnbd->vdevice;
+ gnbd_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+ gnbd_info[0].capacity = gnbd_sectors(gnbd->gh);
+
+ printf("[SECTORS] %llu", gnbd_info[0].capacity);
+
+ //if (gnbd_info[0].capacity == 0)
+ // gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too.
+
+ DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device,
+ gnbd_info[0].capacity);
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = 1; /* number of disks */
+
+ return BLKTAP_RESPOND;
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+int gnbd_request(blkif_request_t *req)
+{
+ struct gnbd_handle *gh;
+ u64 sector;
+ char *spage, *dpage;
+ int ret, i, idx;
+ blkif_response_t *rsp;
+ domid_t dom = ID_TO_DOM(req->id);
+
+ if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) {
+ printf("Data request for unknown domain!!! %d\n", dom);
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+ }
+
+ gh = gnbds[dom]->gh;
+
+ switch (req->operation)
+ {
+ case BLKIF_OP_PROBE:
+ {
+ printf("PROBE!\n");
+ if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) {
+ printf("Already have a PROBE outstanding!\n");
+ goto err;
+ }
+
+ if ( gnbds[dom]->gh_state == GH_DISCONNECTED )
+ {
+ /* need to defer until we are connected. */
+ printf("Deferring PROBE!\n");
+ idx = ID_TO_IDX(req->id);
+ memcpy(&pending_list[idx].req, req, sizeof(*req));
+ ASSERT(pending_list[idx].count == 0);
+ pending_list[idx].count = 1;
+
+ gnbds[dom]->probe_idx = idx;
+ gnbds[dom]->gh_state = GH_PROBEWAITING;
+
+ return BLKTAP_STOLEN;
+ }
+
+
+ return gnbd_blkif_probe(req, gnbds[dom]);
+ }
+ case BLKIF_OP_WRITE:
+ {
+ unsigned long size;
+
+ idx = ID_TO_IDX(req->id);
+ ASSERT(pending_list[idx].count == 0);
+ memcpy(&pending_list[idx].req, req, sizeof(*req));
+ pending_list[idx].count = req->nr_segments;
+ pending_count++; /* dbg */
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ sector = req->sector_number + (8*i);
+
+ size = blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1;
+
+ DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << SECTOR_SHIFT));
+
+ spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+
+ ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx);
+ if (ret) {
+ printf("gnbd error on WRITE\n");
+ goto err;
+ }
+ }
+//printf("[WR] < %lu\n", (unsigned long)idx);
+
+ return BLKTAP_STOLEN;
+ }
+ case BLKIF_OP_READ:
+ {
+ unsigned long size;
+
+ idx = ID_TO_IDX(req->id);
+ ASSERT(pending_list[idx].count == 0);
+ memcpy(&pending_list[idx].req, req, sizeof(*req));
+ pending_list[idx].count = req->nr_segments;
+ pending_count++; /* dbg */
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ sector = req->sector_number + (8*i);
+
+ size = blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1;
+
+ DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << SECTOR_SHIFT));
+
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+
+ ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx);
+ if (ret) {
+ printf("gnbd error on READ\n");
+ goto err;
+ }
+
+ }
+//printf("[RD] < %lu\n", (unsigned long)idx);
+
+ return BLKTAP_STOLEN;
+ }
+ }
+
+ printf("Unknown block operation!\n");
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+/* the gnbd library terminates the request stream. _resp is a noop. */
+int gnbd_response(blkif_response_t *rsp)
+{
+ return BLKTAP_PASS;
+}
+
+int gnbd_pollhook(int fd)
+{
+ int err;
+ struct gnbd_handle *gh;
+ blkif_request_t *req;
+ blkif_response_t *rsp;
+ unsigned long idx;
+
+ gnbd_t *gnbd = get_gnbd_by_fd(fd);
+
+ if (gnbd == NULL) {
+ printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd);
+ return -1;
+ }
+ gh = gnbd->gh;
+ err = gnbd_reply(gh);
+ switch (err) {
+ case GNBD_LOGIN_DONE:
+ if (gnbd->gh_state == GH_PROBEWAITING) {
+ req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req;
+ printf("[!] Sending deferred PROBE!\n");
+ gnbd_blkif_probe(req, gnbd);
+ pending_list[gnbd->probe_idx].count = 0;
+ rsp = (blkif_response_t *)req;
+ blktap_inject_response(rsp);
+ }
+ gnbd->gh_state = GH_CONNECTED;
+ printf("GNBD_LOGIN_DONE (%d)\n", fd);
+ break;
+
+ case GNBD_REQUEST_DONE: /* switch to idx */
+ idx = gnbd_finished_request(gh);
+ req = (blkif_request_t *)&pending_list[idx].req;
+ if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
+ printf("gnbd returned a bad cookie (%lu)!\n", idx);
+ break;
+ }
+
+ pending_list[idx].count--;
+
+ if (pending_list[idx].count == 0) {
+ blkif_request_t tmp = *req;
+ pending_count--; /* dbg */
+ rsp = (blkif_response_t *)req;
+ rsp->id = tmp.id;
+ rsp->operation = tmp.operation;
+ rsp->status = BLKIF_RSP_OKAY;
+ blktap_inject_response(rsp);
+/*
+if (rsp->operation == BLKIF_OP_READ) {
+printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
+} else if (rsp->operation == BLKIF_OP_WRITE) {
+printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
+} else {
+printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
+}
+*/
+ }
+ break;
+
+ case GNBD_CONTINUE:
+ break;
+
+ case 0:
+ break;
+
+ default:
+ printf("gnbd_reply error");
+ break;
+ }
+ return 0;
+}
+
+void gnbd_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_DOMS; i++)
+ gnbds[i] = NULL;
+
+ for (i = 0; i < MAX_REQUESTS; i++)
+ pending_list[i].count = 0;
+
+ printf("GNBD image plugin initialized\n");
+}
+
--- /dev/null
+/* blkgnbdlib.h
+ *
+ * gndb image-backed block device.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent.
+ */
+
+int gnbd_control(control_msg_t *msg);
+int gnbd_request(blkif_request_t *req);
+int gnbd_response(blkif_response_t *rsp); /* noop */
+void gnbd_init(void);
--- /dev/null
+/* blkimg.c
+ *
+ * file-backed disk.
+ */
+
+#include "blktaplib.h"
+#include "blkimglib.h"
+
+
+int main(int argc, char *argv[])
+{
+ image_init();
+
+ blktap_register_ctrl_hook("image_control", image_control);
+ blktap_register_request_hook("image_request", image_request);
+ blktap_listen();
+
+ return 0;
+}
--- /dev/null
+/* blkimglib.c
+ *
+ * file image-backed block device.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <db.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include "blktaplib.h"
+
+//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
+#define TMP_IMAGE_FILE_NAME "fc3.image"
+
+#define MAX_DOMS 1024
+#define MAX_IMGNAME_LEN 255
+#define AMORFS_DEV 61440
+#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
+#define SECTOR_SHIFT 9
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+
+typedef struct {
+ /* These need to turn into an array/rbtree for multi-disk support. */
+ FILE *img;
+ u64 fsid;
+ char imgname[MAX_IMGNAME_LEN];
+ blkif_vdev_t vdevice;
+} image_t;
+
+image_t *images[MAX_DOMS];
+blkif_request_t *reread_list[MAX_REQUESTS];
+
+int image_control(control_msg_t *msg)
+{
+ domid_t domid;
+ DB *db;
+ int ret;
+
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
+ ((blkif_be_create_t *)msg->msg)->domid,
+ ((blkif_be_create_t *)msg->msg)->blkif_handle);
+ domid = ((blkif_be_create_t *)msg->msg)->domid;
+ if (images[domid] != NULL) {
+ printf("attempt to connect from an existing dom!\n");
+ return 0;
+ }
+
+ images[domid] = (image_t *)malloc(sizeof(image_t));
+ if (images[domid] == NULL) {
+ printf("error allocating image record.\n");
+ return 0;
+ }
+
+ images[domid]->img = NULL;
+ images[domid]->fsid = 0;
+
+ printf("Image connected.\n");
+ break;
+
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
+ ((blkif_be_destroy_t *)msg->msg)->domid,
+ ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
+
+ domid = ((blkif_be_destroy_t *)msg->msg)->domid;
+ if (images[domid] != NULL) {
+ if (images[domid]->img != NULL)
+ fclose( images[domid]->img );
+ free( images[domid] );
+ images[domid] = NULL;
+ }
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ {
+ blkif_be_vbd_grow_t *grow;
+
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->domid,
+ ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
+ printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
+ ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
+ grow = (blkif_be_vbd_grow_t *)msg->msg;
+ domid = grow->domid;
+ if (images[domid] == NULL) {
+ printf("VBD_GROW on unconnected domain!\n");
+ return 0;
+ }
+
+ if (grow->extent.device != AMORFS_DEV) {
+ printf("VBD_GROW on non-amorfs device!\n");
+ return 0;
+ }
+
+ /* TODO: config support for arbitrary image files/modes. */
+ sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
+
+ images[domid]->fsid = grow->extent.sector_start;
+ images[domid]->vdevice = grow->vdevice;
+ images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+");
+ if (images[domid]->img == NULL) {
+ printf("Couldn't open image file!\n");
+ return 0;
+ }
+
+ printf("Image file opened. (%s)\n", images[domid]->imgname);
+ break;
+ }
+ }
+ return 0;
+parse_error:
+ printf("Bad control message!\n");
+ return 0;
+
+create_failed:
+ /* TODO: close the db ref. */
+ return 0;
+}
+
+int image_request(blkif_request_t *req)
+{
+ FILE *img;
+ u64 sector;
+ char *spage, *dpage;
+ int ret, i, idx;
+ blkif_response_t *rsp;
+ domid_t dom = ID_TO_DOM(req->id);
+
+ if ((images[dom] == NULL) || (images[dom]->img == NULL)) {
+ printf("Data request for unknown domain!!! %d\n", dom);
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+ }
+
+ img = images[dom]->img;
+
+ switch (req->operation)
+ {
+ case BLKIF_OP_PROBE:
+ {
+ int fd;
+ struct stat stat;
+ vdisk_t *img_info;
+
+
+ /* We expect one buffer only. */
+ if ( req->nr_segments != 1 )
+ goto err;
+
+ /* Make sure the buffer is page-sized. */
+ if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+ (blkif_last_sect (req->frame_and_sects[0]) != 7) )
+ goto err;
+
+ /* loop for multiple images would start here. */
+
+ fd = fileno(img);
+ if (fd == -1) {
+ printf("Couldn't get image fd in PROBE!\n");
+ goto err;
+ }
+
+ ret = fstat(fd, &stat);
+ if (ret != 0) {
+ printf("Couldn't stat image in PROBE!\n");
+ goto err;
+ }
+
+ img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
+ img_info[0].device = images[dom]->vdevice;
+ img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+ img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
+
+ if (img_info[0].capacity == 0)
+ img_info[0].capacity = ((u64)1 << 63); // xend does this too.
+
+ DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
+ img_info[0].capacity);
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = 1; /* number of disks */
+
+ return BLKTAP_RESPOND;
+ }
+ case BLKIF_OP_WRITE:
+ {
+ unsigned long size;
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ sector = req->sector_number + (8*i);
+
+ size = blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1;
+
+ ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
+ if (ret != 0) {
+ printf("fseek error on WRITE\n");
+ goto err;
+ }
+
+ DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << SECTOR_SHIFT));
+
+ spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+ ret = fwrite(spage, size << SECTOR_SHIFT, 1, img);
+ if (ret != 1) {
+ printf("fwrite error on WRITE (%d)\n", errno);
+ goto err;
+ }
+ }
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ rsp->status = BLKIF_RSP_OKAY;
+
+ return BLKTAP_RESPOND;
+ }
+ case BLKIF_OP_READ:
+ {
+ unsigned long size;
+
+ for (i = 0; i < req->nr_segments; i++) {
+
+ sector = req->sector_number + (8*i);
+
+ size = blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1;
+
+ ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
+ if (ret != 0) {
+ printf("fseek error on READ\n");
+ goto err;
+ }
+
+ DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
+ req->sector_number, sector,
+ blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ (long)(sector << SECTOR_SHIFT));
+
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+ dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+ ret = fread(dpage, size << SECTOR_SHIFT, 1, img);
+ if (ret != 1) {
+ printf("fread error on READ\n");
+ goto err;
+ }
+ }
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_READ;
+ rsp->status = BLKIF_RSP_OKAY;
+ return BLKTAP_RESPOND;
+ }
+ }
+
+ printf("Unknow block operation!\n");
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = req->operation;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+/* the image library terminates the request stream. _resp is a noop. */
+int image_response(blkif_response_t *rsp)
+{
+ return BLKTAP_PASS;
+}
+
+void image_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_DOMS; i++)
+ images[i] = NULL;
+}
+
--- /dev/null
+/* blkimglib.h
+ *
+ * file image-backed block device.
+ *
+ * (c) 2004 Andrew Warfield.
+ *
+ * Xend has been modified to use an amorfs:[fsid] disk tag.
+ * This will show up as device type (maj:240,min:0) = 61440.
+ *
+ * The fsid is placed in the sec_start field of the disk extent.
+ */
+
+int image_control(control_msg_t *msg);
+int image_request(blkif_request_t *req);
+int image_response(blkif_response_t *rsp); /* noop */
+void image_init(void);
--- /dev/null
+/*
+ * blkint.h
+ *
+ * Interfaces for the Xen block interposition driver.
+ *
+ * (c) 2004, Andrew Warfield, University of Cambridge
+ *
+ */
+
+#ifndef __BLKINT_H__
+
+//#include "blkif.h"
+
+
+#if 0
+/* Types of ring. */
+#define BLKIF_REQ_RING_TYPE 1
+#define BLKIF_RSP_RING_TYPE 2
+
+/* generic ring struct. */
+typedef struct blkif_generic_ring_struct {
+ int type;
+} blkif_generic_ring_t;
+
+/* A requestor's view of a ring. */
+typedef struct blkif_req_ring_struct {
+
+ int type; /* Will be BLKIF_REQ_RING_TYPE */
+ BLKIF_RING_IDX req_prod; /* PRIVATE req_prod index */
+ BLKIF_RING_IDX rsp_cons; /* Response consumer index */
+ blkif_ring_t *ring; /* Pointer to shared ring struct */
+
+} blkif_req_ring_t;
+
+#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
+
+/* A responder's view of a ring. */
+typedef struct blkif_rsp_ring_struct {
+
+ int type; /* Will be BLKIF_REQ_RING_TYPE */
+ BLKIF_RING_IDX rsp_prod; /* PRIVATE rsp_prod index */
+ BLKIF_RING_IDX req_cons; /* Request consumer index */
+ blkif_ring_t *ring; /* Pointer to shared ring struct */
+
+} blkif_rsp_ring_t;
+
+#define BLKIF_RSP_RING_INIT { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
+
+#define RING(a) (blkif_generic_ring_t *)(a)
+inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
+#endif
+
+/* -------[ interposition -> character device interface ]------------- */
+
+/* /dev/xen/blktap resides at device number major=10, minor=202 */
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs: */
+#define BLKTAP_IOCTL_KICK_FE 1
+#define BLKTAP_IOCTL_KICK_BE 2
+#define BLKTAP_IOCTL_SETMODE 3
+#define BLKTAP_IOCTL_PRINT_IDXS 100
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
+#define BLKTAP_MODE_COPY_FE 0x00000004
+#define BLKTAP_MODE_COPY_BE 0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+ (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+ (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+ return (
+ ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+ ( arg == BLKTAP_MODE_INTERPOSE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+ );
+}
+
+
+
+
+
+
+
+#define __BLKINT_H__
+#endif
--- /dev/null
+/*
+ * blktaplib.c
+ *
+ * userspace interface routines for the blktap driver.
+ *
+ * (c) 2004 Andrew Warfield.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#include <err.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <linux/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <unistd.h>
+
+
+#define __COMPILING_BLKTAP_LIB
+#include "blktaplib.h"
+
+#if 1
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+#define DEBUG_RING_IDXS 1
+
+#define POLLRDNORM 0x040
+
+#define BLKTAP_IOCTL_KICK 1
+
+// this is in the header now
+//DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
+
+void got_sig_bus();
+void got_sig_int();
+
+
+/* in kernel these are opposite, but we are a consumer now. */
+blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */
+blkif_front_ring_t be_ring;
+ctrl_back_ring_t ctrl_ring;
+
+
+
+unsigned long mmap_vstart = 0;
+char *blktap_mem;
+int fd = 0;
+
+#define BLKTAP_RING_PAGES 3 /* Ctrl, Back, Front */
+/*#define BLKTAP_MMAP_PAGES ((11 + 1) * 64)*/
+#define BLKTAP_MMAP_PAGES \
+ ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
+#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
+
+
+
+int bad_count = 0;
+void bad(void)
+{
+ bad_count ++;
+ if (bad_count > 50) exit(0);
+}
+/*-----[ ID Manipulation from tap driver code ]--------------------------*/
+
+#define ACTIVE_RING_IDX unsigned short
+
+inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+ return ( (fe_dom << 16) | idx );
+}
+
+inline unsigned int ID_TO_IDX(unsigned long id)
+{
+ return ( id & 0x0000ffff );
+}
+
+inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
+/*
+static int (*request_hook)(blkif_request_t *req) = NULL;
+static int (*response_hook)(blkif_response_t *req) = NULL;
+*/
+
+/*-----[ Request/Response hook chains.]----------------------------------*/
+
+#define HOOK_NAME_MAX 50
+
+typedef struct ctrl_hook_st {
+ char name[HOOK_NAME_MAX];
+ int (*func)(control_msg_t *);
+ struct ctrl_hook_st *next;
+} ctrl_hook_t;
+
+typedef struct request_hook_st {
+ char name[HOOK_NAME_MAX];
+ int (*func)(blkif_request_t *);
+ struct request_hook_st *next;
+} request_hook_t;
+
+typedef struct response_hook_st {
+ char name[HOOK_NAME_MAX];
+ int (*func)(blkif_response_t *);
+ struct response_hook_st *next;
+} response_hook_t;
+
+static ctrl_hook_t *ctrl_hook_chain = NULL;
+static request_hook_t *request_hook_chain = NULL;
+static response_hook_t *response_hook_chain = NULL;
+
+void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *))
+{
+ ctrl_hook_t *ch_ent, **c;
+
+ ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t));
+ if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
+
+ ch_ent->func = ch;
+ ch_ent->next = NULL;
+ strncpy(ch_ent->name, name, HOOK_NAME_MAX);
+ ch_ent->name[HOOK_NAME_MAX-1] = '\0';
+
+ c = &ctrl_hook_chain;
+ while (*c != NULL) {
+ c = &(*c)->next;
+ }
+ *c = ch_ent;
+}
+
+void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *))
+{
+ request_hook_t *rh_ent, **c;
+
+ rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
+ if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
+
+ rh_ent->func = rh;
+ rh_ent->next = NULL;
+ strncpy(rh_ent->name, name, HOOK_NAME_MAX);
+
+ c = &request_hook_chain;
+ while (*c != NULL) {
+ c = &(*c)->next;
+ }
+ *c = rh_ent;
+}
+
+void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *))
+{
+ response_hook_t *rh_ent, **c;
+
+ rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
+ if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
+
+ rh_ent->func = rh;
+ rh_ent->next = NULL;
+ strncpy(rh_ent->name, name, HOOK_NAME_MAX);
+
+ c = &response_hook_chain;
+ while (*c != NULL) {
+ c = &(*c)->next;
+ }
+ *c = rh_ent;
+}
+
+void print_hooks(void)
+{
+ request_hook_t *req_hook;
+ response_hook_t *rsp_hook;
+ ctrl_hook_t *ctrl_hook;
+
+ printf("Control Hooks:\n");
+ ctrl_hook = ctrl_hook_chain;
+ while (ctrl_hook != NULL)
+ {
+ printf(" [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name);
+ ctrl_hook = ctrl_hook->next;
+ }
+
+ printf("Request Hooks:\n");
+ req_hook = request_hook_chain;
+ while (req_hook != NULL)
+ {
+ printf(" [0x%p] %s\n", req_hook->func, req_hook->name);
+ req_hook = req_hook->next;
+ }
+
+ printf("Response Hooks:\n");
+ rsp_hook = response_hook_chain;
+ while (rsp_hook != NULL)
+ {
+ printf(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
+ rsp_hook = rsp_hook->next;
+ }
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+
+ //req_d = FRONT_RING_NEXT_EMPTY_REQUEST(&be_ring);
+ req_d = RING_GET_REQUEST(BLKIF_RING, &be_ring, be_ring.req_prod_pvt);
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ wmb();
+ be_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *rsp_d;
+
+ //rsp_d = BACK_RING_NEXT_EMPTY_RESPONSE(&fe_ring);
+ rsp_d = RING_GET_RESPONSE(BLKIF_RING, &fe_ring, fe_ring.rsp_prod_pvt);
+ memcpy(rsp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ fe_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
+static void apply_rsp_hooks(blkif_response_t *rsp)
+{
+ response_hook_t *rsp_hook;
+
+ rsp_hook = response_hook_chain;
+ while (rsp_hook != NULL)
+ {
+ switch(rsp_hook->func(rsp))
+ {
+ case BLKTAP_PASS:
+ break;
+ default:
+ printf("Only PASS is supported for resp hooks!\n");
+ }
+ rsp_hook = rsp_hook->next;
+ }
+}
+
+void blktap_inject_response(blkif_response_t *rsp)
+{
+ apply_rsp_hooks(rsp);
+ write_rsp_to_fe_ring(rsp);
+ RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring);
+ ioctl(fd, BLKTAP_IOCTL_KICK_FE);
+}
+
+/*-----[ Polling fd listeners ]------------------------------------------*/
+
+#define MAX_POLLFDS 64
+
+typedef struct {
+ int (*func)(int fd);
+ struct pollfd *pfd;
+ int fd;
+ short events;
+ int active;
+} pollhook_t;
+
+static struct pollfd pfd[MAX_POLLFDS+1];
+static pollhook_t pollhooks[MAX_POLLFDS];
+static unsigned int ph_freelist[MAX_POLLFDS];
+static unsigned int ph_cons, ph_prod;
+#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
+#define PH_IDX(x) (x % MAX_POLLFDS)
+
+int blktap_attach_poll(int fd, short events, int (*func)(int fd))
+{
+ pollhook_t *ph;
+
+ if (nr_pollhooks() == MAX_POLLFDS) {
+ printf("Too many pollhooks!\n");
+ return -1;
+ }
+
+ ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
+
+ ph->func = func;
+ ph->fd = fd;
+ ph->events = events;
+ ph->active = 1;
+
+ printf("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1,
+ nr_pollhooks());
+
+ return 0;
+}
+
+void blktap_detach_poll(int fd)
+{
+ int i;
+
+ for (i=0; i<MAX_POLLFDS; i++)
+ if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
+ ph_freelist[PH_IDX(ph_prod++)] = i;
+ pollhooks[i].pfd->fd = -1;
+ pollhooks[i].active = 0;
+ break;
+ }
+
+ printf("Removed fd %d at ph index %d, now %d phs.\n", fd, i,
+ nr_pollhooks());
+}
+
+void pollhook_init(void)
+{
+ int i;
+
+ for (i=0; i < MAX_POLLFDS; i++) {
+ ph_freelist[i] = (i+1) % MAX_POLLFDS;
+ pollhooks[i].active = 0;
+ }
+
+ ph_cons = 0;
+ ph_prod = MAX_POLLFDS;
+}
+
+void __attribute__ ((constructor)) blktaplib_init(void)
+{
+ printf("[[ C O N S T R U C T O R ]]\n");
+ pollhook_init();
+}
+
+/*-----[ The main listen loop ]------------------------------------------*/
+
+int blktap_listen(void)
+{
+ int notify_be, notify_fe, tap_pfd;
+
+ /* comms rings: */
+ blkif_request_t *req;
+ blkif_response_t *rsp;
+ control_msg_t *msg;
+ blkif_sring_t *sring;
+ ctrl_sring_t *csring;
+ RING_IDX rp, i, pfd_count;
+
+ /* handler hooks: */
+ request_hook_t *req_hook;
+ response_hook_t *rsp_hook;
+ ctrl_hook_t *ctrl_hook;
+
+ signal (SIGBUS, got_sig_bus);
+ signal (SIGINT, got_sig_int);
+
+ print_hooks();
+
+ fd = open("/dev/blktap", O_RDWR);
+ if (fd == -1) {
+ printf("open failed! (%d)\n", errno);
+ goto open_failed;
+ }
+
+ blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if ((int)blktap_mem == -1) {
+ printf("mmap failed! (%d)\n", errno);
+ goto mmap_failed;
+ }
+
+ /* assign the rings to the mapped memory */
+ csring = (ctrl_sring_t *)blktap_mem;
+ BACK_RING_INIT(CTRL_RING, &ctrl_ring, csring);
+
+ sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
+ FRONT_RING_INIT(BLKIF_RING, &be_ring, sring);
+
+ sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
+ BACK_RING_INIT(BLKIF_RING, &fe_ring, sring);
+
+ mmap_vstart = (unsigned long)blktap_mem + (BLKTAP_RING_PAGES << PAGE_SHIFT);
+
+ printf("fe_ring mapped at: %p\n", fe_ring.sring);
+ printf("be_ring mapped at: %p\n", be_ring.sring);
+
+ ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
+
+ while(1) {
+ int ret;
+
+ /* build the poll list */
+
+ DPRINTF("Building poll list.\n");
+
+ pfd_count = 0;
+ for ( i=0; i < MAX_POLLFDS; i++ ) {
+ pollhook_t *ph = &pollhooks[i];
+
+ if (ph->active) {
+ pfd[pfd_count].fd = ph->fd;
+ pfd[pfd_count].events = ph->events;
+ ph->pfd = &pfd[pfd_count];
+ pfd_count++;
+ }
+ }
+
+ tap_pfd = pfd_count;
+ pfd[tap_pfd].fd = fd;
+ pfd[tap_pfd].events = POLLIN;
+
+ DPRINTF("poll() %d fds.\n", pfd_count);
+
+ if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) {
+ if (DEBUG_RING_IDXS)
+ ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
+ continue;
+ }
+
+ DPRINTF("poll returned %d\n", ret);
+
+ for (i=0; i < MAX_POLLFDS; i++) {
+ if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
+ pollhooks[i].func(pollhooks[i].pfd->fd);
+ }
+
+ if (pfd[tap_pfd].revents) {
+
+ /* empty the control ring */
+ rp = ctrl_ring.sring->req_prod;
+ rmb();
+ for (i = ctrl_ring.req_cons; i < rp; i++)
+ {
+ msg = RING_GET_REQUEST(CTRL_RING, &ctrl_ring, i);
+
+ ctrl_hook = ctrl_hook_chain;
+ while (ctrl_hook != NULL)
+ {
+ DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name);
+ /* We currently don't respond to ctrl messages. */
+ ctrl_hook->func(msg);
+ ctrl_hook = ctrl_hook->next;
+ }
+ }
+ /* Using this as a unidirectional ring. */
+ ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
+ RING_PUSH_RESPONSES(CTRL_RING, &ctrl_ring);
+
+ /* empty the fe_ring */
+ notify_fe = 0;
+ notify_be = RING_HAS_UNCONSUMED_REQUESTS(BLKIF_RING, &fe_ring);
+ rp = fe_ring.sring->req_prod;
+ rmb();
+ for (i = fe_ring.req_cons; i != rp; i++)
+ {
+ int done = 0; /* stop forwarding this request */
+
+ req = RING_GET_REQUEST(BLKIF_RING, &fe_ring, i);
+
+ DPRINTF("copying an fe request\n");
+
+ req_hook = request_hook_chain;
+ while (req_hook != NULL)
+ {
+ DPRINTF("REQ_HOOK: %s\n", req_hook->name);
+ switch(req_hook->func(req))
+ {
+ case BLKTAP_RESPOND:
+ apply_rsp_hooks((blkif_response_t *)req);
+ write_rsp_to_fe_ring((blkif_response_t *)req);
+ notify_fe = 1;
+ done = 1;
+ break;
+ case BLKTAP_STOLEN:
+ done = 1;
+ break;
+ case BLKTAP_PASS:
+ break;
+ default:
+ printf("Unknown request hook return value!\n");
+ }
+ if (done) break;
+ req_hook = req_hook->next;
+ }
+
+ if (done == 0) write_req_to_be_ring(req);
+
+ }
+ fe_ring.req_cons = i;
+
+ /* empty the be_ring */
+ notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(BLKIF_RING, &be_ring);
+ rp = be_ring.sring->rsp_prod;
+ rmb();
+ for (i = be_ring.rsp_cons; i != rp; i++)
+ {
+
+ rsp = RING_GET_RESPONSE(BLKIF_RING, &be_ring, i);
+
+ DPRINTF("copying a be request\n");
+
+ apply_rsp_hooks(rsp);
+ write_rsp_to_fe_ring(rsp);
+ }
+ be_ring.rsp_cons = i;
+
+ /* notify the domains */
+
+ if (notify_be) {
+ DPRINTF("notifying be\n");
+ RING_PUSH_REQUESTS(BLKIF_RING, &be_ring);
+ ioctl(fd, BLKTAP_IOCTL_KICK_BE);
+ }
+
+ if (notify_fe) {
+ DPRINTF("notifying fe\n");
+ RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring);
+ ioctl(fd, BLKTAP_IOCTL_KICK_FE);
+ }
+ }
+ }
+
+
+ munmap(blktap_mem, PAGE_SIZE);
+
+ mmap_failed:
+ close(fd);
+
+ open_failed:
+ return 0;
+}
+
+void got_sig_bus() {
+ printf("Attempted to access a page that isn't.\n");
+ exit(-1);
+}
+
+void got_sig_int() {
+ printf("quitting -- returning to passthrough mode.\n");
+ if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
+ exit(0);
+}
--- /dev/null
+/* blktaplib.h
+ *
+ * userland accessors to the block tap.
+ *
+ * for the moment this is rather simple.
+ */
+
+#ifndef __BLKTAPLIB_H__
+#define __BLKTAPLIB_H__
+
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+#if defined(__i386__)
+#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define wmb() __asm__ __volatile__ ( "" : : : "memory" )
+#else
+#error "Define barriers"
+#endif
+
+#include <sys/user.h>
+#include <xen/xen.h>
+#include <xen/io/blkif.h>
+#include <xen/io/ring.h>
+#include <xen/io/domain_controller.h>
+#include "blkint.h"
+
+#define BLKTAP_PASS 0 /* Keep passing this request as normal. */
+#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */
+#define BLKTAP_STOLEN 2 /* Hook has stolen request. */
+
+#define domid_t unsigned short
+
+inline unsigned int ID_TO_IDX(unsigned long id);
+inline domid_t ID_TO_DOM(unsigned long id);
+
+void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
+void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
+void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
+void blktap_inject_response(blkif_response_t *);
+int blktap_attach_poll(int fd, short events, int (*func)(int));
+void blktap_detach_poll(int fd);
+int blktap_listen(void);
+
+/*-----[ Accessing attached data page mappings ]-------------------------*/
+#define MMAP_PAGES_PER_REQUEST \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+extern unsigned long mmap_vstart;
+
+
+/*-----[ Defines that are only used by library clients ]-----------------*/
+
+#ifndef __COMPILING_BLKTAP_LIB
+
+static char *blkif_op_name[] = {
+ [BLKIF_OP_READ] = "READ",
+ [BLKIF_OP_WRITE] = "WRITE",
+ [BLKIF_OP_PROBE] = "PROBE",
+};
+
+#endif /* __COMPILING_BLKTAP_LIB */
+
+#endif /* __BLKTAPLIB_H__ */
--- /dev/null
+
+CFLAGS += -Wall -Werror -g
+LDFLAGS += -g
+
+libgnbd.a: libgnbd.o
+ $(AR) r $@ $<
+
+gnbdtest: gnbdtest.o libgnbd.a
--- /dev/null
+
+#include <err.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/poll.h>
+
+#include "libgnbd.h"
+
+#define PRINTF(x) printf x
+#if 0
+#define DFPRINTF(x...) fprintf(stderr, ##x)
+#define DPRINTF(x) DFPRINTF x
+#else
+#define DPRINTF(x)
+#endif
+
+static unsigned char buf1[8 << 9];
+static unsigned char buf2[8 << 9];
+static unsigned char buf3[8 << 9];
+
+int
+main(int argc, char **argv)
+{
+ struct gnbd_handle *gh;
+ struct pollfd pfd[1];
+ int err, tout;
+
+ gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1",
+ "arcadians.cl.cam.ac.uk");
+ if (gh == NULL)
+ errx(1, "gnbd_setup");
+
+ memset(pfd, 0, sizeof(pfd));
+ pfd[0].fd = gnbd_fd(gh);
+ pfd[0].events = POLLIN;
+
+ while ((tout = poll(pfd, 1, 0)) >= 0) {
+ if (tout == 0)
+ continue;
+ DPRINTF(("event\n"));
+ if (pfd[0].revents) {
+ err = gnbd_reply(gh);
+ pfd[0].events = POLLIN;
+ switch (err) {
+ case GNBD_LOGIN_DONE:
+ DPRINTF(("sectors: %08llu\n",
+ gnbd_sectors(gh)));
+ err = gnbd_read(gh, 8, 8, buf2, 1);
+ if (err)
+ warnx("gnbd_read");
+ err = gnbd_read(gh, 0, 8, buf1, 0);
+ if (err)
+ warnx("gnbd_read");
+ err = gnbd_read(gh, 16, 8, buf3, 2);
+ if (err)
+ warnx("gnbd_read");
+ break;
+ case GNBD_REQUEST_DONE:
+ DPRINTF(("request done %ld\n",
+ gnbd_finished_request(gh)));
+ if (0 && gnbd_finished_request(gh) == 0) {
+ write(1, buf1, 8 << 9);
+ err = gnbd_write(gh, 0, 8, buf1, 10);
+ if (err)
+ warnx("gnbd_write");
+ }
+ break;
+ case GNBD_CONTINUE:
+ DPRINTF(("continue\n"));
+ break;
+ case 0:
+ break;
+ case GNBD_CONTINUE_WRITE:
+ DPRINTF(("continue write\n"));
+ pfd[0].events |= POLLOUT;
+ break;
+ default:
+ warnx("gnbd_reply error");
+ break;
+ }
+ DPRINTF(("got gnbd reply\n"));
+ }
+ }
+
+ return 0;
+}
--- /dev/null
+/* libgnbd.c
+ *
+ * gnbd client library
+ *
+ * Copyright (c) 2005, Christian Limpach
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <err.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <stdio.h>
+
+#include "libgnbd.h"
+
+#define PROTOCOL_VERSION 2
+
+#define EXTERN_KILL_GSERV_REQ 5
+#define EXTERN_LOGIN_REQ 6
+
+#define GNBD_REQUEST_MAGIC 0x37a07e00
+#define GNBD_KEEP_ALIVE_MAGIC 0x5b46d8c2
+#define GNBD_REPLY_MAGIC 0x41f09370
+
+enum {
+ GNBD_CMD_READ = 0,
+ GNBD_CMD_WRITE = 1,
+ GNBD_CMD_DISC = 2,
+ GNBD_CMD_PING = 3
+};
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define htonll(x) (x)
+#define ntohll(x) (x)
+#endif
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x) bswap_64(x)
+#define ntohll(x) bswap_64(x)
+#endif
+
+#define PRINTF(x) printf x
+#if 0
+#define DFPRINTF(x...) fprintf(stderr, ##x)
+#define DPRINTF(x) DFPRINTF x
+#else
+#define DPRINTF(x)
+#endif
+
+struct gnbd_request {
+ struct gnbd_request *gr_next;
+ unsigned char *gr_buf;
+ ssize_t gr_size;
+ ssize_t gr_done;
+ unsigned long gr_cookie;
+};
+
+struct gnbd_handle {
+ int gh_fd;
+ unsigned int gh_flags;
+ uint64_t gh_sectors;
+ char gh_devname[32];
+ char gh_nodename[65];
+ struct sockaddr_in gh_sin;
+ struct gnbd_request *gh_outstanding_requests;
+ struct gnbd_request **gh_outstanding_requests_last;
+ struct gnbd_request *gh_incoming_request;
+ unsigned long gh_finished_request;
+};
+#define GHF_EXPECT_KILL_GSERV_REPLY 0x0001
+#define GHF_EXPECT_LOGIN_REPLY 0x0002
+#define GHF_INCOMING_REQUEST 0x0004
+
+struct device_req {
+ char name[32];
+};
+
+struct node_req {
+ char node_name[65];
+};
+
+struct login_req {
+ uint64_t timestamp;
+ uint16_t version;
+ uint8_t pad[6];
+ char devname[32];
+};
+
+struct login_reply {
+ uint64_t sectors;
+ uint16_t version;
+ uint8_t err;
+ uint8_t pad[5];
+};
+
+struct gnbd_server_request {
+ uint32_t magic;
+ uint32_t type;
+ char handle[8];
+ uint64_t from;
+ uint32_t len;
+} __attribute__ ((packed));
+
+struct gnbd_server_reply {
+ uint32_t magic;
+ uint32_t error;
+ char handle[8];
+} __attribute__ ((packed));
+
+static int
+read_buf(int fd, void *buf, size_t count, size_t *read_count)
+{
+ int err;
+
+ err = read(fd, buf, count);
+ if (read_count) {
+ if (err >= 0)
+ *read_count = err;
+ } else if (err != count)
+ return EINTR; /* xxx */
+ return err < 0;
+}
+
+static int
+read_4(int fd, unsigned long *val)
+{
+ unsigned long buf;
+ int err;
+
+ err = read_buf(fd, &buf, sizeof(buf), NULL);
+ if (err == 0)
+ *val = ntohl(buf);
+ return err;
+}
+
+static int
+write_buf(int fd, void *buf, size_t count)
+{
+ int err;
+
+ err = write(fd, buf, count);
+ return err < 0;
+}
+
+static int
+write_4(int fd, unsigned long val)
+{
+ unsigned long buf;
+ int err;
+
+ buf = htonl(val);
+ err = write_buf(fd, &buf, sizeof(buf));
+ return err;
+}
+
+
+static int
+socket_connect(struct gnbd_handle *gh)
+{
+ int err;
+
+ if (gh->gh_fd >= 0)
+ return 0;
+
+ gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0);
+ if (gh->gh_fd < 0) {
+ warn("socket");
+ return gh->gh_fd;
+ }
+
+ err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin,
+ sizeof(gh->gh_sin));
+ if (err) {
+ warn("connect");
+ goto out;
+ }
+
+ return 0;
+ out:
+ close (gh->gh_fd);
+ gh->gh_fd = -1;
+ return err;
+}
+
+static int
+socket_shutdown(struct gnbd_handle *gh)
+{
+
+ close (gh->gh_fd);
+ gh->gh_fd = -1;
+ return 0;
+}
+
+static int
+find_request(struct gnbd_handle *gh, struct gnbd_request *gr)
+{
+ struct gnbd_request **tmp;
+
+ for (tmp = &gh->gh_outstanding_requests; *tmp;
+ tmp = &(*tmp)->gr_next) {
+ if (*tmp == gr) {
+ *tmp = (*tmp)->gr_next;
+ if (*tmp == NULL)
+ gh->gh_outstanding_requests_last = tmp;
+ return 0;
+ }
+ }
+ return ENOENT;
+}
+
+static int
+kill_gserv(struct gnbd_handle *gh)
+{
+ struct device_req dr;
+ struct node_req nr;
+ int err;
+
+ DPRINTF(("gnbd_kill_gserv\n"));
+ err = socket_connect(gh);
+ if (err) {
+ warnx("socket_connect");
+ return err;
+ }
+
+ err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ);
+ if (err) {
+ warnx("send EXTERN_LOGIN_REQ failed");
+ goto out;
+ }
+
+ strncpy(dr.name, gh->gh_devname, sizeof(dr.name));
+ err = write_buf(gh->gh_fd, &dr, sizeof(dr));
+ if (err) {
+ warnx("send device_req failed");
+ goto out;
+ }
+
+ strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
+ err = write_buf(gh->gh_fd, &nr, sizeof(nr));
+ if (err) {
+ warnx("send node_req failed");
+ goto out;
+ }
+
+ gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY;
+ DPRINTF(("gnbd_kill_gserv ok\n"));
+
+ return 0;
+ out:
+ socket_shutdown(gh);
+ return err;
+}
+
+static int
+login(struct gnbd_handle *gh)
+{
+ struct login_req lr;
+ struct node_req nr;
+ int err;
+ uint64_t timestamp;
+ struct timeval tv;
+
+ DPRINTF(("gnbd_login\n"));
+ err = socket_connect(gh);
+ if (err) {
+ warnx("socket_connect");
+ return err;
+ }
+
+ err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ);
+ if (err) {
+ warnx("send EXTERN_LOGIN_REQ failed");
+ goto out;
+ }
+
+ err = gettimeofday(&tv, NULL);
+ if (err) {
+ warnx("gettimeofday");
+ goto out;
+ }
+ timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+
+ lr.timestamp = htonll(timestamp);
+ lr.version = htons(PROTOCOL_VERSION);
+ strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname));
+ err = write_buf(gh->gh_fd, &lr, sizeof(lr));
+ if (err) {
+ warnx("send login_req failed");
+ goto out;
+ }
+
+ strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
+ err = write_buf(gh->gh_fd, &nr, sizeof(nr));
+ if (err) {
+ warnx("send node_req failed");
+ goto out;
+ }
+
+ gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY;
+
+ DPRINTF(("gnbd_login ok\n"));
+ return 0;
+ out:
+ socket_shutdown(gh);
+ return err;
+}
+
+static int
+kill_gserv_reply(struct gnbd_handle *gh)
+{
+ unsigned long reply;
+ int err;
+
+ DPRINTF(("read gnbd_kill_gserv_reply\n"));
+ err = read_4(gh->gh_fd, &reply);
+ if (err) {
+ warnx("read kill_gserv_reply failed");
+ return err;
+ }
+
+ if (reply && reply != ENODEV) {
+ warnx("kill gserv failed: %s", strerror(reply));
+ return reply;
+ }
+
+ gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY;
+ socket_shutdown(gh);
+
+ err = login(gh);
+ if (err)
+ warnx("gnbd_login");
+
+ return err;
+}
+
+static int
+login_reply(struct gnbd_handle *gh)
+{
+ struct login_reply lr;
+ int err;
+
+ DPRINTF(("read gnbd_login_reply\n"));
+ err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL);
+ if (err) {
+ warnx("read login_reply failed");
+ return err;
+ }
+
+ if (lr.err) {
+ if (lr.version) {
+ warnx("gnbd version mismatch %04x != %04x",
+ PROTOCOL_VERSION, ntohs(lr.version));
+ return EINVAL;
+ }
+ warnx("login refused: %s", strerror(lr.err));
+ return lr.err;
+ }
+ gh->gh_sectors = ntohll(lr.sectors);
+
+ gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY;
+
+ return GNBD_LOGIN_DONE;
+}
+
+static int
+incoming_request(struct gnbd_handle *gh)
+{
+ struct gnbd_request *gr = gh->gh_incoming_request;
+ ssize_t done;
+ int err;
+
+ DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done,
+ gr->gr_size));
+ err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done,
+ gr->gr_size - gr->gr_done, &done);
+ if (err)
+ goto out;
+
+ DPRINTF(("incoming_request: got %d\n", done));
+ gr->gr_done += done;
+ if (gr->gr_done == gr->gr_size) {
+ gh->gh_flags &= ~GHF_INCOMING_REQUEST;
+ gh->gh_finished_request = gr->gr_cookie;
+ free(gr);
+ return GNBD_REQUEST_DONE;
+ }
+
+ return GNBD_CONTINUE;
+
+ out:
+ gh->gh_flags &= ~GHF_INCOMING_REQUEST;
+ gh->gh_finished_request = 0;
+ free(gr);
+ return err;
+}
+
+
+
+int
+gnbd_close(struct gnbd_handle *gh)
+{
+ int err;
+ struct gnbd_request **tmp;
+
+ for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next)
+ free(*tmp);
+
+ if (gh->gh_flags & GHF_INCOMING_REQUEST)
+ free(gh->gh_incoming_request);
+
+ err = close(gh->gh_fd);
+ if (err)
+ warnx("close");
+ free(gh);
+
+ return err;
+}
+
+int
+gnbd_fd(struct gnbd_handle *gh)
+{
+ return gh->gh_fd;
+}
+
+unsigned long
+gnbd_finished_request(struct gnbd_handle *gh)
+{
+ return gh->gh_finished_request;
+}
+
+int
+gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
+ unsigned char *buf, unsigned long cookie)
+{
+ struct gnbd_server_request gsr;
+ struct gnbd_request *gr;
+ int err;
+
+ gr = malloc(sizeof(struct gnbd_request));
+ if (gr == NULL)
+ return ENOMEM;
+ memset(gr, 0, sizeof(gr));
+
+ gr->gr_buf = buf;
+ gr->gr_size = count << 9;
+ gr->gr_done = 0;
+ gr->gr_cookie = cookie;
+
+ gsr.magic = htonl(GNBD_REQUEST_MAGIC);
+ gsr.type = htonl(GNBD_CMD_READ);
+ gsr.from = htonll(sector << 9);
+ gsr.len = htonl(gr->gr_size);
+ memset(gsr.handle, 0, sizeof(gsr.handle));
+ memcpy(gsr.handle, &gr, sizeof(gr));
+
+ err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
+ if (err) {
+ warnx("write_buf");
+ goto out;
+ }
+
+ *gh->gh_outstanding_requests_last = gr;
+ gh->gh_outstanding_requests_last = &gr->gr_next;
+
+ return 0;
+
+ out:
+ free(gr);
+ return err;
+}
+
+int
+gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
+ unsigned char *buf, unsigned long cookie)
+{
+ struct gnbd_server_request gsr;
+ struct gnbd_request *gr;
+ int err;
+
+ gr = malloc(sizeof(struct gnbd_request));
+ if (gr == NULL)
+ return ENOMEM;
+ memset(gr, 0, sizeof(gr));
+
+ gr->gr_buf = buf;
+ gr->gr_size = count << 9;
+ gr->gr_done = 0;
+ gr->gr_cookie = cookie;
+
+ gsr.magic = htonl(GNBD_REQUEST_MAGIC);
+ gsr.type = htonl(GNBD_CMD_WRITE);
+ gsr.from = htonll(sector << 9);
+ gsr.len = htonl(gr->gr_size);
+ memset(gsr.handle, 0, sizeof(gsr.handle));
+ memcpy(gsr.handle, &gr, sizeof(gr));
+
+ err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
+ if (err) {
+ warnx("write_buf");
+ goto out;
+ }
+
+ /* XXX handle non-blocking socket */
+ err = write_buf(gh->gh_fd, buf, gr->gr_size);
+ if (err) {
+ warnx("write_buf");
+ goto out;
+ }
+ gr->gr_done += gr->gr_size;
+
+ *gh->gh_outstanding_requests_last = gr;
+ gh->gh_outstanding_requests_last = &gr->gr_next;
+
+ DPRINTF(("write done\n"));
+
+ return 0;
+
+ out:
+ free(gr);
+ return err;
+}
+
+int
+gnbd_reply(struct gnbd_handle *gh)
+{
+ struct gnbd_server_reply gsr;
+ struct gnbd_request *gr;
+ int err;
+
+ DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags));
+ if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY))
+ return kill_gserv_reply(gh);
+ if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY))
+ return login_reply(gh);
+ if ((gh->gh_flags & GHF_INCOMING_REQUEST))
+ return incoming_request(gh);
+
+ DPRINTF(("read response\n"));
+ err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL);
+ if (err) {
+ warnx("read gnbd_reply failed");
+ return err;
+ }
+
+ if (ntohl(gsr.error)) {
+ warnx("gnbd server reply error: %s", strerror(gsr.error));
+ return gsr.error;
+ }
+
+ switch (ntohl(gsr.magic)) {
+ case GNBD_KEEP_ALIVE_MAGIC:
+ DPRINTF(("read keep alive magic\n"));
+ return GNBD_CONTINUE;
+ case GNBD_REPLY_MAGIC:
+ DPRINTF(("read reply magic\n"));
+ memcpy(&gr, gsr.handle, sizeof(gr));
+ err = find_request(gh, gr);
+ if (err) {
+ warnx("unknown request");
+ return err;
+ }
+ if (gr->gr_done != gr->gr_size) {
+ gh->gh_incoming_request = gr;
+ gh->gh_flags |= GHF_INCOMING_REQUEST;
+ return GNBD_CONTINUE;
+ } else {
+ gh->gh_finished_request = gr->gr_cookie;
+ free(gr);
+ return GNBD_REQUEST_DONE;
+ }
+ default:
+ break;
+ }
+
+ return GNBD_CONTINUE;
+}
+
+uint64_t
+gnbd_sectors(struct gnbd_handle *gh)
+{
+
+ return gh->gh_sectors;
+}
+
+struct gnbd_handle *
+gnbd_setup(char *server, unsigned int port, char *devname, char *nodename)
+{
+ struct gnbd_handle *gh;
+ struct addrinfo *res, *ai;
+ int err;
+
+ gh = malloc(sizeof(struct gnbd_handle));
+ if (gh == NULL)
+ return NULL;
+ memset(gh, 0, sizeof(gh));
+ gh->gh_fd = -1;
+ gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests;
+
+ strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname));
+ strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename));
+
+ err = getaddrinfo(server, NULL, NULL, &res);
+ if (err) {
+ if (err != EAI_SYSTEM)
+ warnx("getaddrinfo: %s", gai_strerror(err));
+ else
+ warn("getaddrinfo: %s", gai_strerror(err));
+ goto out;
+ }
+
+ for (ai = res; ai; ai = ai->ai_next) {
+ if (ai->ai_socktype != SOCK_STREAM)
+ continue;
+ if (ai->ai_family == AF_INET)
+ break;
+ }
+
+ if (ai == NULL)
+ goto out;
+
+ gh->gh_sin.sin_family = ai->ai_family;
+ gh->gh_sin.sin_port = htons(port);
+ memcpy(&gh->gh_sin.sin_addr,
+ &((struct sockaddr_in *)ai->ai_addr)->sin_addr,
+ sizeof(gh->gh_sin.sin_addr));
+
+ err = kill_gserv(gh);
+ if (err) {
+ warnx("gnbd_kill_gserv");
+ goto out;
+ }
+
+ freeaddrinfo(res);
+ return gh;
+ out:
+ free(gh);
+ freeaddrinfo(res);
+ return NULL;
+}
--- /dev/null
+/* libgnbd.h
+ *
+ * gnbd client library
+ *
+ * Copyright (c) 2005, Christian Limpach
+ */
+
+#define GNBD_LOGIN_DONE 0x10001
+#define GNBD_REQUEST_DONE 0x10002
+#define GNBD_CONTINUE 0x10003
+#define GNBD_CONTINUE_WRITE 0x10004
+
+struct gnbd_handle;
+int gnbd_close(struct gnbd_handle *);
+int gnbd_fd(struct gnbd_handle *);
+unsigned long gnbd_finished_request(struct gnbd_handle *);
+int gnbd_kill_gserv(struct gnbd_handle *);
+int gnbd_login(struct gnbd_handle *);
+int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
+ unsigned long);
+int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
+ unsigned long);
+int gnbd_reply(struct gnbd_handle *);
+uint64_t gnbd_sectors(struct gnbd_handle *);
+struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *);
Blkctl.block('unbind', self.type, self.node)
def setNode(self, node):
+
+ # NOTE:
+ # This clause is testing code for storage system experiments.
+ # Add a new disk type that will just pass an opaque id in the
+ # start_sector and use an experimental device type.
+ # Please contact andrew.warfield@cl.cam.ac.uk with any concerns.
+ if self.type == 'amorfs':
+ self.node = node
+ self.device = 61440 # (240,0)
+ self.start_sector = long(self.params)
+ self.nr_sectors = long(0)
+ return
+ # done.
+
mounted_mode = check_mounted(self, node)
if not '!' in self.mode and mounted_mode:
if mounted_mode is "w":
#ifndef __XEN_PUBLIC_IO_BLKIF_H__
#define __XEN_PUBLIC_IO_BLKIF_H__
-#include <asm-xen/xen-public/io/ring.h>
+#include "ring.h"
#define blkif_vdev_t u16
#define blkif_sector_t u64